"""
    由于网站改版，原有作业改为使用html文件读取，另外homework1b则尝试提取主页右侧导航栏
"""

import lxml.etree as le

# 读取html格式文件
with open('edu.html', 'r', encoding='utf-8', errors='ignore') as f:
    html_ = f.read()
    # 将读取的html文件转换为XML对象
    html_x = le.HTML(html_)
    # 通过筛选得到所需的div对象
    div_x_s = html_x.xpath('//div[@class="classify_cList"]')
    data_l = []
    # 对包含1级分类的所有对象遍历
    for div_x in div_x_s:
        # 得到1级分类
        category1 = div_x.xpath('./h3/a/text()')[0]
        # 并通过1级分类继续提取此分类下的所有2级分类
        category2_s = div_x.xpath('./div/span/a/text()')
        # 添加数据到data_l中
        data_l.append(
            dict(
                category1=category1,
                category2_s=category2_s,
            )
        )

    # 对data_l进行遍历，并打印出来
    for data in data_l:
        print(data.get('category1'))
        for category2 in category2_s:
            print('\t{}'.format(category2))